library(data.table)
library(ggplot2)
library(lfe)
library(stringr)
library(reldist)
library(scales)
library(Hmisc)
library(readstata13)

full <- fread('../data/all_records07-17wcfm.csv') # ENTIRE HMDA
avery <- data.table(read.dta13('../data/avery_panel.dta')) # AVERY FILE
bank_data <- fread('../data/bankdata_conventional.csv') # Bank data

hmda <- full[loan_type == 1 & loan_purpose %in% c(1,3) & property_type == 1 & owner_occupancy == 1 & lien_status == 1]
hmda[TYPE %in% c(40,41) ,type := 'S']
hmda[TYPE %in% 10:14    ,type := 'B']
hmda<-hmda[type == 'B']

hmda[action_taken %in% c(3,7), approval := 0]
hmda[action_taken %in% c(1,2), approval := 1]
hmda <- hmda[!is.na(hmda$approval),]

hmda[, conf.pct := loan_amount / conforming_limit]
hmda[, jumbo := as.integer(conf.pct>1.005)]

temp <- avery[,c('year','respondent_id','agency_code','ENTITY')]

hmda <- merge(hmda, temp,by.x=c('year','respondent_id','agency_code'),by.y=c('year','respondent_id','agency_code'),all.x=F,all.y=F)
hmda[, rssdid := ENTITY] 

bank.approval <- hmda[, .(approval_rate = mean(approval, na.rm = T)), by = c('year','rssdid')]
bankdata <- merge(bank_data, bank.approval, by.x=c('year','rssdid'),by.y=c('year','rssdid'),all.x=F,all.y=F)

bankdata <- bankdata[,cr.temp := cr/100]
bankdata$cr <- NULL
bankdata[,cr := cr.temp]


# Figure 5A -- 
approval.xs.reg <- lm(approval_rate ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding + core_deposits+ as.factor(year) ,data=bankdata,weights=bankdata$total_volume)

cr.xs.reg   <- lm(cr   ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding + core_deposits+ as.factor(year) ,data=bankdata,weights=bankdata$total_volume)

bankdata[,approval.xs.resid := approval_rate - predict(approval.xs.reg,bankdata)]
bankdata[,cr.xs.resid   := cr   - predict(cr.xs.reg,bankdata)]
bankdata[,cr.resid.xs.bin := ave(cr.xs.resid,cut2(cr.xs.resid,g=25),na.rm=T)]

temp.model <- lm(approval.xs.resid ~ cr.xs.resid,data=bankdata,weights=bankdata$total_originations)
qq <- bankdata[,j=list(m = weighted.mean(approval.xs.resid,w=total_originations,na.rm=T),n = sum(total_originations,na.rm=T)),by=cr.resid.xs.bin]
qq[,cr.xs.resid := cr.resid.xs.bin]
qq[,approval.predict := predict(temp.model,qq,interval = 'confidence')[,'fit']]
qq[,approval.predict.l := predict(temp.model,qq,interval = 'confidence')[,'lwr']]
qq[,approval.predict.u := predict(temp.model,qq,interval = 'confidence')[,'upr']]

ggplot(qq) + geom_point(aes(x=cr.resid.xs.bin,y=m,size=n)) + theme_bw() + theme(legend.position = 'none') + xlab('CR % (residual)') + ylab('Approval % (residual)') + 
scale_x_continuous(labels = percent) + scale_y_continuous(labels = percent)  + geom_line(aes(x=cr.resid.xs.bin,y=approval.predict),linetype = 'dashed') + geom_ribbon(aes(x=cr.resid.xs.bin,ymin=approval.predict.l,ymax=approval.predict.u),alpha=.25)  
ggsave('5a_approval_residual.png',height=3,width=5,units = 'in')


# Figure 5B -- 
# MAke sure we can identify everything...
#bankdata[,n.years := .N,by='rssdid']
#d.in.wi <- d.in[!(rssdid %in% c('5050028','1632','2938','4536084'))]
approval.wi.reg <- felm(approval_rate~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding+ core_deposits | as.factor(year) + as.factor(rssdid) ,data=bankdata,weights=bankdata$total_volume)
cr.wi.reg   <- felm(cr   ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding+ core_deposits | as.factor(year) + as.factor(rssdid) ,data=bankdata,weights=bankdata$total_volume)

dt <- data.table(approval.resid = approval.wi.reg$residuals,cr.resid = cr.wi.reg$residuals,ww = approval.wi.reg$weights)
temp.model <- lm(approval.resid.approval_rate ~ cr.resid.cr,data=dt,weights=dt$ww)
dt[,cr.resid.bin := ave(cr.resid.cr,cut2(cr.resid.cr,g=25),na.rm=T) ]

qq <- dt[,j=list(m = weighted.mean(approval.resid.approval_rate,w=ww,na.rm=T),n = sum(ww,na.rm=T)),by=cr.resid.bin]
qq[,cr.resid.cr := cr.resid.bin]
qq[,approval.predict := predict(temp.model,qq,interval = 'confidence')[,'fit']]
qq[,approval.predict.l := predict(temp.model,qq,interval = 'confidence')[,'lwr']]
qq[,approval.predict.u := predict(temp.model,qq,interval = 'confidence')[,'upr']]
ggplot(qq) + geom_point(aes(x=cr.resid.bin,y=m,size=n)) + theme_bw() + theme(legend.position = 'none') + xlab('CR % (residual)') + ylab('Approval % (residual)') + 
scale_x_continuous(labels = percent) + scale_y_continuous(labels = percent) + geom_line(aes(x=cr.resid.bin,y=approval.predict),linetype = 'dashed') + geom_ribbon(aes(x=cr.resid.bin,ymin=approval.predict.l,ymax=approval.predict.u),alpha=.25)  
ggsave('5b_approval_residual.png',height=3,width=5,units = 'in')


##new session

